Class 6 Multilevel Model

Data Import

(you don’t actually need to run this)

library(WDI)
library(tidyverse)
library(countrycode)

indicators <- list('gini' = 'SI.POV.GINI',
                'wlfp' = 'SL.TLF.ACTI.ZS'  ,# Labor Force Participation Rate (%), Female
                'gdp_pc_ppp' = "NY.GDP.PCAP.PP.KD",# "GDP per capita, PPP (constant 2005 international $)"
                'resource_rents' = 'NY.GDP.TOTL.RT.ZS'
                )  

wdi <- WDI(indicator=indicators,
          start = 2020,
          end = 2023
          )
wdi$country_id<- countrycode(wdi$iso3c, origin='iso3c', destination ='vdem')
wdi_filled<-wdi|>
  group_by(country)|>
  arrange(year)|>
  fill(c(resource_rents, gini, wlfp), .direction='downup')|>
  slice_tail(n=1)


polregions<-c('Eastern Europe', #  (including German Democratic Republic, excluding the Caucasus)
              'Latin America and the Caribbean',
              'The Middle East and North Africa', # (including Israel and Türkiye, excluding Cyprus)
              'Sub-Saharan Africa',
              'Western Europe and North America', #  (including Cyprus, but excluding German Democratic Republic)
              'East Asia and the Pacific',
              'South and Central Asia' #(including the Caucasus)
              
              )

# Electoral system data 
elsystem<-vdemdata::vdem|>
  arrange(year)|>
  filter(year>=2010)|>
  group_by(country_name)|>
  fill(v2elparlel, .direction='down')|>
  slice_tail(n=1)|>
  select(country_name,v2elparlel)
  

vdem<-vdemdata::vdem|>
   arrange(year)|>
  filter(v2x_elecreg == 1)|>
  filter(year>=2020)|>
  group_by(country_name)|>
  select(starts_with('country'), v2lgfemleg, v2x_elecreg, v2lgqugen, v2lgqugent, e_regionpol_7C,
         e_fh_pr, e_fh_cl,e_fh_status)|>
  
  fill(everything(),.direction='down')|>
  slice_tail(n=1)|>
  ungroup()|>
  left_join(wdi_filled, by=join_by(country_id == country_id))|>
  select(-starts_with("iso"), -country_id, -country, -year)|>
  left_join(elsystem, by='country_name')|>
  mutate(region = factor(e_regionpol_7C, labels=polregions),
         log_gdp_pc_ppp = log(gdp_pc_ppp),
         fh_status = factor(e_fh_status, levels=c(1,2, 3), labels=c('Free', "Partly Free", "Not Free")),
         fh_status = fct_relevel(fh_status, "Not Free", "Partly Free", "Free"),
         free_dummy = factor(fh_status == "Free", labels=c("Not Free", "Free")),
         majoritarian_dummy = factor(v2elparlel ==0, labels=c("Not Majoritarian", "Majoritarian"))
         )

vdem<-set_variable_labels(vdem, 
                    'v2lgfemleg' = "Lower chamber % female legislators", 
                    'v2x_elecreg' = "", 
                    'v2lgqugen' = 'Lower chamber gender quota', 
                    'v2lgqugent' = 'Lower chamber gender quota threshold', 
                    'resource_rents' = 'Total natural resources rents (% of GDP)',
                    'gini'= 'Gini coefficient', 
                    'wlfp' = "Female labor force participation",
                    'log_gdp_pc_ppp' = 'Logged GDP per capita, PPP',
                    'region' ='Region (politico-geographic 7-category)',
                    'majoritarian_dummy' = 'Lower chamber electoral system'
                    )


vdem_cleaned<-vdem|>
  drop_na(v2lgfemleg, v2lgqugen , wlfp , resource_rents , log_gdp_pc_ppp, e_fh_pr, e_fh_cl)

#saveRDS(vdem_cleaned,file='vdem_cleaned.rds')
if(!file.exists('vdem_clean.rds')){
  download.file('https://github.com/Neilblund/GVPT728_Winter24/raw/refs/heads/main/Additional%20Code%20and%20Data/vdem_cleaned.rds',
                'vdem_clean.rds',
                mode='wb')

}

vdem_cleaned<-readRDS('vdem_clean.rds')

Describing the main relationship

In general, do countries with majoritarian electoral systems elect fewer women?

relationship <- ggplot(vdem_cleaned, aes(x = majoritarian_dummy, y = v2lgfemleg)) +
  geom_half_point(aes(color =  majoritarian_dummy), 
                  transformation = position_quasirandom(width = 0.1),
                  side = "l", size = 1, alpha = 0.5) +
  geom_half_boxplot(aes(fill = majoritarian_dummy), side = "r") + 
  
  guides(color = "none", fill = "none") +
  labs(x = "Lower House Electoral System", y = "% women in legislature") +
  theme_bw() +
  scale_fill_brewer(palette='Dark2') +
  scale_color_brewer(palette='Dark2')


relationship

However: might want to control for some additional factors that could influence this relationship.

  • Women’s Labor Force Participation

  • logged GDP

  • Whether a country has quotas guaranteeing a certain number of seats for women in the legislature.

data<-vdem_cleaned|>

  select(v2lgfemleg, wlfp, log_gdp_pc_ppp, v2lgqugen, majoritarian_dummy)

GGally::ggpairs(data, aes(color=majoritarian_dummy)) +
  theme_bw() +
  scale_color_brewer(palette='Dark2')  +
    scale_fill_brewer(palette='Dark2') 

Using OLS

model_0<-lm(v2lgfemleg ~ 
               majoritarian_dummy +
               wlfp + 
               log_gdp_pc_ppp +
               v2lgqugen,
               data=vdem_cleaned)



modelsummary(list("OLS" = model_0), 
              coef_rename=TRUE,
             coef_omit = "Intercept",
              estimate  = "{estimate}",  
             statistic = c("conf.int"),
             conf_level = .95,        
 note = "95% CI in brackets",
 gof_omit = 'F|RMSE|R2$|AIC|Log.Lik.',
             )
tinytable_k9hthh6cmcfafy4nwjtg
OLS
95% CI in brackets
Lower chamber electoral system [Majoritarian] -6.195
[-10.102, -2.289]
Female labor force participation 0.304
[0.132, 0.477]
Logged GDP per capita, PPP 1.316
[-0.288, 2.921]
Lower chamber gender quota 2.332
[1.140, 3.525]
Num.Obs. 157
R2 Adj. 0.229
BIC 1219.2

The effect of majoritarianism (relative to non-majoritarian electoral systems) is a estimated to be around a 6% decrease in the % women in the legislature. The presence of quotas, unsurprisingly, also has some impact, as does female labor force participation. While GDP may also play a role, the 95% confidence interval contains zero and is not statistically significant at conventional levels.

Fixed Effects

We might be concerned that this model fails to capture important variation. For instance: maybe cultural norms, colonial legacies, or simple geography play a major role in determining the outcome of interest?

ggplot(vdem_cleaned, aes(x=wlfp, y=v2lgfemleg)) + 
  geom_point(aes(color=majoritarian_dummy)) +
  theme_bw() +
  scale_color_brewer(palette='Dark2')  +
  scale_fill_brewer(palette='Dark2')  +
  labs(x="Women's Labor Force Participation",
       y="% women in the legislature",
       color = 'Electoral System'
       ) +
  facet_wrap(~region)

While we don’t have direct measures of a lot of these characteristics, we might try to account for them by including k-1 region-level dummy variables in the model. This would be the a fixed effects model:

model_1<-feols(v2lgfemleg ~ 
               majoritarian_dummy +
               wlfp + 
               log_gdp_pc_ppp +
               v2lgqugen | region,
               data=vdem_cleaned, 
               cluster ~ region)

model_1
OLS estimation, Dep. Var.: v2lgfemleg
Observations: 157
Fixed-effects: region: 7
Standard-errors: Clustered (region) 
                                Estimate Std. Error  t value  Pr(>|t|)    
majoritarian_dummyMajoritarian -5.449041   2.179653 -2.49996 0.0465309 *  
wlfp                            0.195182   0.082976  2.35227 0.0568819 .  
log_gdp_pc_ppp                  1.735662   1.109650  1.56415 0.1688154    
v2lgqugen                       2.129287   0.529949  4.01791 0.0069754 ** 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
RMSE: 9.90766     Adj. R2: 0.307379
                Within R2: 0.175529
modelsummary(list("OLS" = model_0, "Region FE" = model_1), 
             coef_rename=TRUE,
             coef_omit = "Intercept",
             estimate  = "{estimate}",  
             statistic = c("conf.int"),
             conf_level = .95,        
             note = "95% CI in brackets",
             gof_omit = 'F|RMSE|R2$|AIC|Log.Lik.',
             )
tinytable_usistm6azfw9zvr62ve7
OLS Region FE
95% CI in brackets
Lower chamber electoral system [Majoritarian] -6.195 -5.449
[-10.102, -2.289] [-10.782, -0.116]
Female labor force participation 0.304 0.195
[0.132, 0.477] [-0.008, 0.398]
Logged GDP per capita, PPP 1.316 1.736
[-0.288, 2.921] [-0.980, 4.451]
Lower chamber gender quota 2.332 2.129
[1.140, 3.525] [0.833, 3.426]
Num.Obs. 157 157
R2 Adj. 0.229 0.307
R2 Within 0.176
R2 Within Adj. 0.153
BIC 1219.2 1221.3
Std.Errors by: region

Notably, the inclusion of fixed effects here doesn’t drastically alter our estimates, and we still find that majoritarian systems have about the same expected negative effect on the dependent variable that they did in our original linear model.

While the fixed effects terms are automatically left out of the model output here, we can view them by running:

fixef(model_1)
$region
       East Asia and the Pacific                   Eastern Europe 
                      -9.1597424                       -6.6165617 
 Latin America and the Caribbean           South and Central Asia 
                      -1.8319281                       -7.5443107 
              Sub-Saharan Africa The Middle East and North Africa 
                      -2.8207371                      -13.3610522 
Western Europe and North America 
                       0.9046372 

attr(,"class")
[1] "fixest.fixef" "list"        
attr(,"exponential")
[1] FALSE

We can think of these as the separate Y-intercept terms for each region in our data set.

fixef_slopes<-predict_response(model_1, terms=c('majoritarian_dummy','region'))|>
  plot(ci=FALSE, connect_lines=TRUE, show_data=TRUE, jitter=TRUE) +
  ggtitle("Fixed Effects") +
  scale_color_brewer(palette='Dark2')

fixef_slopes

Multilevel Model

While both the standard OLS model and the fixed effects model seem to work reasonably well here, there may be cases where a multilevel model is more appropriate. This could be for several reasons. For instance:

  • We might think that the fixed effect for some small regions are implausibly low or high. After all: if a region only has a handful of countries, we might find that region has a really high or really low value of the dependent variable by random chance. Since random effects pool estimates across levels of the random effect, a random effects model may be a better fit.

  • We might want to investigate a characteristic that doesn’t vary much or at all within regions. For instance: we might have a variable for colonial legacy, primary religion, or membership in a regional organization. Since some of these will vary very little within each region, these controls would be perfectly or near-perfectly co-linear with the fixed effects estimator.

To run a linear random effects model, we’ll use the lmer function and include a random effect term. The lme4 package provides a whole bunch of ways to specify complex hierarchical models, but if we have a single random effect term, this is pretty straightforward: we just add something like:

+ (1|random_effect_term)

…to our formula. So here’s how I can create a region-level random effect:

model_2<-lmer(v2lgfemleg ~ 
               majoritarian_dummy +
               wlfp + 
               log_gdp_pc_ppp +
               v2lgqugen +
               (1|region), data=vdem_cleaned)

We’ll take a look at the summary output:

summary(model_2)
Linear mixed model fit by REML ['lmerMod']
Formula: v2lgfemleg ~ majoritarian_dummy + wlfp + log_gdp_pc_ppp + v2lgqugen +  
    (1 | region)
   Data: vdem_cleaned

REML criterion at convergence: 1176.9

Scaled residuals: 
     Min       1Q   Median       3Q      Max 
-1.88705 -0.66728  0.04488  0.64984  2.88985 

Random effects:
 Groups   Name        Variance Std.Dev.
 region   (Intercept)  17.33    4.163  
 Residual             105.53   10.273  
Number of obs: 157, groups:  region, 7

Fixed effects:
                               Estimate Std. Error t value
(Intercept)                    -7.60650   10.82151  -0.703
majoritarian_dummyMajoritarian -5.74595    1.92982  -2.977
wlfp                            0.22878    0.08766   2.610
log_gdp_pc_ppp                  1.71625    1.04148   1.648
v2lgqugen                       2.16646    0.58354   3.713

Correlation of Fixed Effects:
            (Intr) mjrt_M wlfp   lg_g__
mjrtrn_dmmM -0.095                     
wlfp        -0.328 -0.066              
lg_gdp_pc_p -0.816  0.061 -0.245       
v2lgqugen   -0.268  0.260  0.156  0.100

The random effects terms can be accessed separately. These values should correspond to the average difference between the group-specific intercept vs. the overall intercept term:

ranef(model_2)
$region
                                 (Intercept)
Eastern Europe                    -0.9922269
Latin America and the Caribbean    2.8991079
The Middle East and North Africa  -5.2553624
Sub-Saharan Africa                 2.5144694
Western Europe and North America   4.8133411
East Asia and the Pacific         -2.7656509
South and Central Asia            -1.2136781

with conditional variances for "region" 

Note that we don’t automatically get standard errors for these estimates. There are a number of methods for getting confidence intervals around a random effect. The estimate_grouplevels from the modelbased package is an easy one:

How important are the random effects for our model? One way to quantify this is using the intra-class correlation coefficient, which can be interpreted as the proportion of the total variance attributable to the grouping variable:

performance::icc(model_2)

Now we can summarize our results in a single table:

model_list<-list("OLS" = model_0, "Region FE" = model_1, "Region RE" =model_2)

modelsummary(model_list, 
             coef_rename=TRUE,
             coef_omit = "Intercept",
             estimate  = "{estimate}",  
             statistic = c("conf.int"),
             conf_level = .95,        
             note = "95% CI in brackets",
             gof_omit = 'F|RMSE|R2$|AIC|Log.Lik.|R2 Marg|R2 Cond|R2 Within',
             )
tinytable_tfcxieo0jkb4kuci3ivn
OLS Region FE Region RE
95% CI in brackets
Lower chamber electoral system [Majoritarian] -6.195 -5.449 -5.746
[-10.102, -2.289] [-10.782, -0.116] [-9.559, -1.933]
Female labor force participation 0.304 0.195 0.229
[0.132, 0.477] [-0.008, 0.398] [0.056, 0.402]
Logged GDP per capita, PPP 1.316 1.736 1.716
[-0.288, 2.921] [-0.980, 4.451] [-0.342, 3.774]
Lower chamber gender quota 2.332 2.129 2.166
[1.140, 3.525] [0.833, 3.426] [1.013, 3.319]
SD (Observations) 10.273
Num.Obs. 157 157 157
R2 Adj. 0.229 0.307
BIC 1219.2 1221.3 1212.3
ICC 0.1
Std.Errors by: region

And/Or plot the coefficients

modelplot(model_list,
          coef_rename=TRUE,
          coef_omit = c(1)
          ) +
  scale_color_brewer(palette='Dark2') +
  geom_vline(xintercept=0, lty=2)

In contrast to the fixed effects model, which allows each region to have a totally separate Y-intercept, the random effects model “pools” the estimated intercepts across each group and shrinks them toward the global mean. The result is that the differences in the intercept terms are less dramatic, especially for smaller groups where there’s less data.

raneff_slopes<-predict_response(model_2, terms=c('majoritarian_dummy','region'), 
                                margin='empirical')|>
  plot( ci=FALSE, show_data=TRUE, connect_lines=TRUE, jitter=TRUE) +
  ggtitle("Region Random Effects") +
  scale_color_brewer(palette='Dark2')


fixef_slopes / raneff_slopes +
    plot_layout(guides = "collect")